install.packages(c("patchwork"))
Installing patchwork [1.1.1] ...
OK [copied local binary]
suppressPackageStartupMessages({
library(tidyverse)
library(MOFA2)
library(Matrix)
library(SingleCellExperiment)
library(scran)
library(glue)
library(scater)
library(patchwork)
library(batchelor)
library(rhdf5)
# library(ggraph)
}
)
Define plotting utils
remove_x_axis <- function(){
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())
}
remove_y_axis <- function(){
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.title.y = element_blank())
}
org_colors <- read_csv("~/Pan_fetal_immune/metadata/organ_colors.csv")
New names:
* `` -> ...1
[1m[1mRows: [1m[22m[34m[34m9[34m[39m [1m[1mColumns: [1m[22m[34m[34m3[34m[39m
[36m──[39m [1m[1mColumn specification[1m[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (2): organ, color
[32mdbl[39m (1): ...1
[36mℹ[39m Use [38;5;235m[48;5;253m[38;5;235m[48;5;253m`spec()`[48;5;253m[38;5;235m[49m[39m to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set [38;5;235m[48;5;253m[38;5;235m[48;5;253m`show_col_types = FALSE`[48;5;253m[38;5;235m[49m[39m to quiet this message.
org_colors <- setNames(org_colors$color, org_colors$organ)
figdir <- "~/mount/gdrive/Pan_fetal/Updates_and_presentations/figures/MOFA_analysis/"
if (!dir.exists(figdir)){ dir.create(figdir) }
split = "LYMPHOID"
indir <- glue("/nfs/team205/ed6/data/Fetal_immune/LMM_data/LMM_input_{split}_PBULK/")
matrix <- readMM(file = paste0(indir, "matrix.mtx.gz"))
coldata <- read.csv(file = paste0(indir, "metadata.csv.gz")) %>%
column_to_rownames("X")
rowdata <- read.csv(file = paste0(indir, "gene.csv.gz"))
## Make SingleCellExperiment obj
sce <- SingleCellExperiment(list(logcounts = t(matrix)), colData = coldata)
rownames(sce) <- make.unique(rowdata$GeneName)
## Plot number of cells per organ/celltype pair
n_cells_heatmap <- data.frame(colData(sce)) %>%
group_by(anno_lvl_2_final_clean, organ) %>%
summarise(n_cells=sum(n_cells)) %>%
ggplot(aes(anno_lvl_2_final_clean, organ)) +
geom_tile(aes(fill=log10(n_cells))) +
geom_text(aes(label=n_cells), color="white") +
scale_fill_viridis_c() +
theme_classic(base_size = 16) +
xlab("celltype") +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())
`summarise()` has grouped output by 'anno_lvl_2_final_clean'. You can override using the `.groups` argument.
n_samples_heatmap <- data.frame(colData(sce)) %>%
group_by(anno_lvl_2_final_clean, organ) %>%
summarise(n_samples=n()) %>%
ggplot(aes(anno_lvl_2_final_clean, organ)) +
geom_tile(aes(fill=n_samples)) +
geom_text(aes(label=n_samples), color="white") +
scale_fill_viridis_c(option="cividis") +
theme_classic(base_size = 16) +
xlab("celltype") +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5))
`summarise()` has grouped output by 'anno_lvl_2_final_clean'. You can override using the `.groups` argument.
n_cells_heatmap / n_samples_heatmap
anno_groups
$MYELOID
[1] "DC3" "DC2" "DC1"
[4] "PROLIFERATING_MACROPHAGE" "EO/BASO/MAST" "CD14_MONO"
[7] "CD14+_MACROPHAGE" "OLFML3+_MICROGLIA" "PROMONOCYTE_(PROLIFERATING)"
[10] "CD16+_MACROPHAGE" "NEUTROPHIL" "PROMONOCYTE"
[13] "YS_MACROPHAGE" "BM_CD14_MONO" "KUPFFER_RP_MACROPHAGE"
[16] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "YS_ERY_MACROPHAGE" "SPLENIC_MACROPHAGE"
[19] "OSTEOCLAST"
$`ERYTHROID CELLS`
[1] "EARLY_ERY" "EARLY_MK" "PROMYELOCYTE" "ERY_MACROPHAGE" "MID_ERY" "LATE_ERY" "LATE_MK"
[8] "YS_ERY" "CYCLING_YS_ERY"
$`B CELLS`
[1] "LARGE_PRE_B" "PRE_PRO_B" "PRO_B" "CYCLING_B" "SMALL_PRE_B" "MATURE_B" "B1" "IMMATURE_B" "LATE_PRO_B"
[10] "PLASMA_B"
$OTHER
[1] "DOUBLET_IMMUNE_FIBROBLAST" "LOW_Q_INCONSISTENT" "DOUBLET_LYMPHOID_MACROPHAGE"
[4] "LOW_QUALITY" "HIGH_MITO" "HIGH_MITO_DC"
[7] "DOUBLETS_FIBRO_ERY" "DOUBLET_ENDOTHELIUM_ERYTHROCYTE" "DOUBLET_ERY_B"
[10] "LOW_QUALITY_MACROPHAGE" "LOW_QUALITY_MID_ERY_(HIGH_RIBO)" "PLACENTAL_CONTAMINANTS"
[13] "DOUBLET"
$PROGENITORS
[1] "MEMP" "GMP" "LMPP_ELP" "HSC_MPP" "MEP" "CMP" "CYCLING_MEMP" "CYCLING_MPP"
$STROMA
[1] "ENDOTHELIUM_CLUST11" "VSMC/PERICYTE" "ENDOTHELIUM_CLUST42"
[4] "CYCLING_FIBROBLAST_CLUST15" "SKIN_FIBROBLAST_CLUST1" "PERIVASCULAR_MACROPHAGE"
[7] "MELANOCYTE" "MYOFIBROBLAST" "SKIN_FIBROBLAST_CLUST8"
[10] "CYCLING_FIBROBLAST_CLUST17" "KERATINOCYTE" "SKIN_FIBROBLAST_CLUST25"
[13] "SKIN_FIBROBLAST_CLUST2" "GLIAL" "SKIN_FIBROBLAST_CLUST22"
[16] "ENDOTHELIUM_CLUST9" "VSMC" "ENDOTHELIUM_CLUST5"
[19] "MESOTHELIUM" "SPLENIC_FIBROBLAST_CLUST26" "SPLENIC_FIBROBLAST_CLUST0"
[22] "GUT_FIBROBLAST_CLUST4" "HEPATIC_VSMC/PERICYTE" "GUT_FIBROBLAST_CLUST27"
[25] "SMOOTH_MUSCLE" "NEURON" "GUT_FIBROBLAST_CLUST6"
[28] "HEPATOCYTE-LIKE" "YS_STROMA" "FIBROBLAST_CLUST12"
[31] "HEPATOCYTE_CLUST33" "HEPATOCYTE_CLUST16" "GUT_EPITHELIUM_CLUST32"
[34] "CYCLING_EPITHELIUM" "GUT_EPITHELIUM_CLUST10" "EC"
[37] "DEVELOPING_NEPHRON_CLUST21" "DEVELOPING_NEPHRON_CLUST38" "ENTEROENDOCRINE_CLUST46"
[40] "GUT_MYOFIBROBLAST" "FIBROBLAST_CLUST31" "INTERSTITIAL_CELLS_OF_CAJAL"
[43] "KIDNEY_VSMC/PERICYTE" "MESENCHYMAL_LYMPHOID_TISSUE_ORGANISER" "ENTEROENDOCRINE_CLUST52"
[46] "SKIN_FIBROBLAST_CLUST29" "SKIN_FIBROBLAST_CLUST24" "SKIN_FIBROBLAST_CLUST30"
[49] "MUSCLE_SATELLITE" "OSTEOBLAST" "ENDOTHELIUM_CLUST45"
[52] "SKELETAL_MUSCLE" "FIBROBLAST_CLUST53" "CHONDROCYTE"
$`NK/T CELLS`
[1] "CYCLING_T" "CD4+T" "CD8+T" "TREG" "NK" "CYCLING_NK" "NK_T" "DP(P)_T" "TH17"
[10] "CD8AA" "ABT(ENTRY)" "DP(Q)_T" "DN(P)_T" "DN(early)_T" "DN(Q)_T"
$PDC
[1] "pDC"
$ILC
[1] "ILC3" "ILC2" "CYCLING_ILC"
## Plot number of cells per organ/celltype pair
n_cells_heatmap <- data.frame(colData(sce)) %>%
group_by(anno_lvl_2_final_clean, organ) %>%
summarise(n_cells=sum(n_cells)) %>%
ggplot(aes(anno_lvl_2_final_clean, organ)) +
geom_tile(aes(fill=log10(n_cells))) +
geom_text(aes(label=n_cells), color="white") +
scale_fill_viridis_c() +
theme_classic(base_size = 16) +
xlab("celltype") +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())
`summarise()` has grouped output by 'anno_lvl_2_final_clean'. You can override using the `.groups` argument.
n_samples_heatmap <- data.frame(colData(sce)) %>%
group_by(anno_lvl_2_final_clean, organ) %>%
summarise(n_samples=n()) %>%
ggplot(aes(anno_lvl_2_final_clean, organ)) +
geom_tile(aes(fill=n_samples)) +
geom_text(aes(label=n_samples), color="white") +
scale_fill_viridis_c(option="cividis") +
theme_classic(base_size = 16) +
xlab("celltype") +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5))
`summarise()` has grouped output by 'anno_lvl_2_final_clean'. You can override using the `.groups` argument.
n_cells_heatmap / n_samples_heatmap
## Feature selection w scran WITHIN CELLTYPE
anno_groups <- split(colnames(sce), sce$anno_lvl_2_final_clean)
all_hvgs <- c()
for (i in anno_groups){
dec <- modelGeneVar(sce[,i])
hvgs <- getTopHVGs(dec, n = 1000)
all_hvgs <- union(all_hvgs, hvgs)
}
sce <- sce[which(rowSums(logcounts(sce)) > 0),]
sce
class: SingleCellExperiment
dim: 28260 993
metadata(0):
assays(1): logcounts
rownames(28260): TSPAN6 TNMD ... AP000646.1 AP006216.3
rowData names(0):
colnames(993): F45_SK_CD45P_FCAImmP7579224-F45-SK-CD4+T-12-5GEX F45_SK_CD45P_FCAImmP7579224-F45-SK-CD8+T-12-5GEX ...
F50_SP_CD45P_FCAImmP7803020-F50-SP-IMMATURE_B-15-5GEX F30_TH_CD45N_FCAImmP7277565-F30-TH-ABT(ENTRY)-14-3GEX
colData names(7): Sample donor ... method n_cells
reducedDimNames(0):
altExpNames(0):
EDA with PCA
sce <- runPCA(sce, scale=TRUE, ncomponents=30,
exprs_values = "logcounts", subset_row=all_hvgs)
# ## Variance explained
# percent.var <- attr(reducedDim(sce), "percentVar")
# plot(percent.var, log="y", xlab="PC", ylab="Variance explained (%)")
plotPCA(sce, colour_by="donor", ncomponents=6)
plotPCA(sce, colour_by="method", ncomponents=6)
plotPCA(sce, colour_by="organ", ncomponents=10)
Minimize obvious technical effects (3GEX/5GEX, donor) using linear regression (following procedure from OSCA)
## Regress technical effects
design <- model.matrix(~donor+method,data=colData(sce))
residuals <- regressBatches(sce, assay.type = "logcounts", design = design)
assay(sce, "corrected_logcounts") <- as.matrix(assay(residuals[,colnames(sce)], "corrected"))
## Regress organ (soup effect)
design <- model.matrix(~organ,data=colData(sce)) ## Include organ term to capture soup
residuals <- regressBatches(sce, assay.type = "corrected_logcounts", design = design)
assay(sce, "corrected_logcounts") <- as.matrix(assay(residuals[,colnames(sce)], "corrected"))
Check regression has an effect repeating PCA
sce <- runPCA(sce, scale=TRUE, ncomponents=30, exprs_values = "corrected_logcounts")
plotPCA(sce, colour_by="method", ncomponents=6)
plotPCA(sce, colour_by="donor", ncomponents=6)
plotPCA(sce, colour_by="organ", ncomponents=8)
## Feature selection w scran WITHIN CELLTYPE
anno_groups <- split(colnames(sce), sce$anno_lvl_2_final_clean)
all_hvgs <- c()
for (i in anno_groups){
dec <- modelGeneVar(sce[,i], assay.type = "corrected_logcounts")
hvgs <- getTopHVGs(dec, n = 1000)
all_hvgs <- union(all_hvgs, hvgs)
}
Make MOFA object (Use celltypes as grouping covariate)
mofa <- create_mofa_from_SingleCellExperiment(sce[all_hvgs,], assay = "corrected_logcounts",
groups = "anno_lvl_2_final_clean", extract_metadata = TRUE)
Error in create_mofa_from_SingleCellExperiment(sce[all_hvgs, ], assay = "corrected_logcounts", :
unused argument (extract_metadata = TRUE)
sessionInfo()
R version 4.0.4 (2021-02-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.1 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
locale:
[1] LC_CTYPE=en_US.utf8 LC_NUMERIC=C LC_TIME=en_US.utf8 LC_COLLATE=en_US.utf8 LC_MONETARY=en_US.utf8
[6] LC_MESSAGES=en_US.utf8 LC_PAPER=en_US.utf8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.utf8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices datasets utils methods base
other attached packages:
[1] rhdf5_2.34.0 glue_1.4.2 Matrix_1.3-2 MOFA2_1.0.1 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4
[9] readr_2.0.0 tidyr_1.1.3 tibble_3.1.3 ggplot2_3.3.5 tidyverse_1.3.1
loaded via a namespace (and not attached):
[1] MatrixGenerics_1.2.1 httr_1.4.2 jsonlite_1.7.2 modelr_0.1.8 assertthat_0.2.1 BiocManager_1.30.16
[7] stats4_4.0.4 renv_0.12.5 cellranger_1.1.0 ggrepel_0.9.1 corrplot_0.90 pillar_1.6.2
[13] backports_1.2.1 lattice_0.20-41 reticulate_1.20 RColorBrewer_1.1-2 rvest_1.0.1 colorspace_2.0-2
[19] plyr_1.8.6 cowplot_1.1.1 pkgconfig_2.0.3 pheatmap_1.0.12 broom_0.7.9 haven_2.4.3
[25] scales_1.1.1 HDF5Array_1.18.1 Rtsne_0.15 tzdb_0.1.2 generics_0.1.0 IRanges_2.24.1
[31] ellipsis_0.3.2 withr_2.4.2 BiocGenerics_0.36.1 cli_3.0.1 magrittr_2.0.1 crayon_1.4.1
[37] readxl_1.3.1 fs_1.5.0 fansi_0.5.0 xml2_1.3.2 tools_4.0.4 hms_1.1.0
[43] lifecycle_1.0.0 matrixStats_0.60.0 basilisk.utils_1.2.2 Rhdf5lib_1.12.1 S4Vectors_0.28.1 munsell_0.5.0
[49] reprex_2.0.1 DelayedArray_0.16.3 compiler_4.0.4 rlang_0.4.11 grid_4.0.4 rhdf5filters_1.2.1
[55] rstudioapi_0.13 rappdirs_0.3.3 basilisk_1.2.1 gtable_0.3.0 DBI_1.1.1 reshape2_1.4.4
[61] R6_2.5.0 lubridate_1.7.10 knitr_1.33 uwot_0.1.10 utf8_1.2.2 filelock_1.0.2
[67] stringi_1.7.3 parallel_4.0.4 Rcpp_1.0.7 vctrs_0.3.8 png_0.1-7 dbplyr_2.1.1
[73] tidyselect_1.1.1 xfun_0.25
Prepare 4 training
data_opts <- get_default_data_options(object)
data_opts$use_float32 <- TRUE
data_opts$center_groups <- FALSE
object@data_options <- data_opts
model_opts <- get_default_model_options(object)
model_opts$num_factors <- 30
# model_opts$ard_factors <- FALSE
train_opts <- get_default_training_options(object)
train_opts$seed <- 2020
train_opts$convergence_mode <- "medium" # use "fast" for faster training
train_opts$stochastic <- FALSE
# mefisto_opts <- get_default_mefisto_options(object)
# mefisto_opts$warping <- FALSE
# mefisto_opts$sparseGP <- TRUE
object <- prepare_mofa(
object = object,
data_options = data_opts,
model_options = model_opts,
training_options = train_opts
)
# Multi-group mode requested.
This is an advanced option, if this is the first time that you are running MOFA, we suggest that you try do some exploration first without specifying groups. Two important remarks:
- The aim of the multi-group framework is to identify the sources of variability *within* the groups. If your aim is to find a factor that 'separates' the groups, you DO NOT want to use the multi-group framework. Please see the FAQ on the MOFA2 webpage.
- It is important to account for the group effect before selecting highly variable features (HVFs). We suggest that either you calculate HVFs per group and then take the union, or regress out the group effect before HVF selection
Checking data options...
Due to string size limitations in the HDF5 format, sample names will be trimmed to less than 50 charactersChecking training options...
Checking model options...
object
Untrained MOFA model with the following characteristics:
Number of views: 1
Views names: corrected_logcounts
Number of features (per view): 7300
Number of groups: 23
Groups names: ABT(ENTRY) B1 CD4+T CD8+T CD8AA CYCLING_MPP CYCLING_NK CYCLING_T HSC_MPP ILC3 IMMATURE_B LARGE_PRE_B LATE_PRO_B LMPP_ELP MATURE_B MEMP NK NK_T PRE_PRO_B PRO_B SMALL_PRE_B TH17 TREG
Number of samples (per group): 26 32 63 55 24 28 61 38 32 62 29 54 32 10 55 26 87 52 40 50 46 43 48
Wrapped in run_mofa.R
outfile <- glue('{indir}{split}_mofa_model_oneview_organCorrected.hdf5')
mofa_trained <- run_mofa(object, outfile = outfile)
Warning: Output file /nfs/team205/ed6/data/Fetal_immune/LMM_data/LMM_input_LYMPHOID_PBULK/LYMPHOID_mofa_model_oneview_organCorrected.hdf5 already exists, it will be replaced
Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)...
Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'
#########################################################
### __ __ ____ ______ ###
### | \/ |/ __ \| ____/\ _ ###
### | \ / | | | | |__ / \ _| |_ ###
### | |\/| | | | | __/ /\ \_ _| ###
### | | | | |__| | | / ____ \|_| ###
### |_| |_|\____/|_|/_/ \_\ ###
### ###
#########################################################
use_float32 set to True: replacing float64 arrays by float32 arrays to speed up computations...
Successfully loaded view='corrected_logcounts' group='ABT(ENTRY)' with N=26 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='B1' with N=32 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='CD4+T' with N=63 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='CD8+T' with N=55 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='CD8AA' with N=24 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='CYCLING_MPP' with N=28 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='CYCLING_NK' with N=61 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='CYCLING_T' with N=38 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='HSC_MPP' with N=32 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='ILC3' with N=62 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='IMMATURE_B' with N=29 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='LARGE_PRE_B' with N=54 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='LATE_PRO_B' with N=32 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='LMPP_ELP' with N=10 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='MATURE_B' with N=55 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='MEMP' with N=26 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='NK' with N=87 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='NK_T' with N=52 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='PRE_PRO_B' with N=40 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='PRO_B' with N=50 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='SMALL_PRE_B' with N=46 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='TH17' with N=43 samples and D=7300 features...
Successfully loaded view='corrected_logcounts' group='TREG' with N=48 samples and D=7300 features...
Model options:
- Automatic Relevance Determination prior on the factors: True
- Automatic Relevance Determination prior on the weights: True
- Spike-and-slab prior on the factors: False
- Spike-and-slab prior on the weights: True
Likelihoods:
- View 0 (corrected_logcounts): gaussian
Warning: some group(s) have less than 15 samples, MOFA won't be able to learn meaningful factors for these group(s)...
######################################
## Training the model with seed 2020 ##
######################################
ELBO before training: -132488757.08
Iteration 1: time=10.91, ELBO=297780.79, deltaELBO=132786537.870 (100.22475929%), Factors=30
Iteration 2: time=11.01, Factors=30
Iteration 3: time=11.05, Factors=30
Iteration 4: time=10.97, Factors=30
Iteration 5: time=10.95, Factors=30
Iteration 6: time=11.00, ELBO=2908121.07, deltaELBO=2610340.279 (1.97023531%), Factors=30
Iteration 7: time=10.82, Factors=30
Iteration 8: time=10.70, Factors=30
Iteration 9: time=10.70, Factors=30
Iteration 10: time=10.80, Factors=30
Iteration 11: time=10.69, ELBO=3064916.38, deltaELBO=156795.305 (0.11834612%), Factors=30
Iteration 12: time=10.86, Factors=30
Iteration 13: time=10.77, Factors=30
Iteration 14: time=10.67, Factors=30
Iteration 15: time=11.03, Factors=30
Iteration 16: time=11.07, ELBO=3107186.46, deltaELBO=42270.084 (0.03190466%), Factors=30
Iteration 17: time=10.80, Factors=30
Iteration 18: time=11.07, Factors=30
Iteration 19: time=11.09, Factors=30
Iteration 20: time=10.95, Factors=30
Iteration 21: time=11.05, ELBO=3126530.89, deltaELBO=19344.434 (0.01460081%), Factors=30
Iteration 22: time=11.07, Factors=30
Iteration 23: time=11.02, Factors=30
Iteration 24: time=11.01, Factors=30
Iteration 25: time=11.02, Factors=30
Iteration 26: time=11.03, ELBO=3145472.08, deltaELBO=18941.185 (0.01429645%), Factors=30
Iteration 27: time=10.66, Factors=30
Iteration 28: time=10.77, Factors=30
Iteration 29: time=10.60, Factors=30
Iteration 30: time=10.82, Factors=30
Iteration 31: time=10.81, ELBO=3158977.22, deltaELBO=13505.138 (0.01019342%), Factors=30
Iteration 32: time=10.83, Factors=30
Iteration 33: time=10.76, Factors=30
Iteration 34: time=10.68, Factors=30
Iteration 35: time=10.82, Factors=30
Iteration 36: time=10.76, ELBO=3161387.65, deltaELBO=2410.430 (0.00181935%), Factors=30
Iteration 37: time=10.74, Factors=30
Iteration 38: time=10.87, Factors=30
Iteration 39: time=10.59, Factors=30
Iteration 40: time=10.80, Factors=30
Iteration 41: time=11.00, ELBO=3192915.90, deltaELBO=31528.249 (0.02379692%), Factors=30
Iteration 42: time=11.02, Factors=30
Iteration 43: time=10.59, Factors=30
Iteration 44: time=10.87, Factors=30
Iteration 45: time=10.95, Factors=30
Iteration 46: time=10.95, ELBO=3285178.21, deltaELBO=92262.317 (0.06963785%), Factors=30
Iteration 47: time=10.82, Factors=30
Iteration 48: time=11.08, Factors=30
Iteration 49: time=10.97, Factors=30
Iteration 50: time=10.82, Factors=30
Iteration 51: time=10.65, ELBO=3482792.60, deltaELBO=197614.387 (0.14915559%), Factors=30
Iteration 52: time=10.75, Factors=30
Iteration 53: time=11.60, Factors=30
Iteration 54: time=12.39, Factors=30
Iteration 55: time=19.86, Factors=30
Iteration 56: time=41.79, ELBO=3502687.97, deltaELBO=19895.369 (0.01501665%), Factors=30
Iteration 57: time=119.68, Factors=30
Iteration 58: time=102.59, Factors=30
Iteration 59: time=19.51, Factors=30
Iteration 60: time=10.35, Factors=30
Iteration 61: time=13.20, ELBO=3508492.11, deltaELBO=5804.146 (0.00438086%), Factors=30
Iteration 62: time=37.62, Factors=30
Iteration 63: time=74.94, Factors=30
Iteration 64: time=75.59, Factors=30
Iteration 65: time=39.72, Factors=30
Iteration 66: time=11.14, ELBO=3511429.39, deltaELBO=2937.276 (0.00221700%), Factors=30
Iteration 67: time=10.97, Factors=30
Iteration 68: time=10.75, Factors=30
Iteration 69: time=10.74, Factors=30
Iteration 70: time=10.66, Factors=30
Iteration 71: time=10.69, ELBO=3513329.29, deltaELBO=1899.897 (0.00143401%), Factors=30
Iteration 72: time=10.65, Factors=30
Iteration 73: time=10.83, Factors=30
Iteration 74: time=10.55, Factors=30
Iteration 75: time=10.67, Factors=30
Iteration 76: time=10.52, ELBO=3514696.51, deltaELBO=1367.217 (0.00103195%), Factors=30
Iteration 77: time=10.65, Factors=30
Iteration 78: time=10.71, Factors=30
Iteration 79: time=10.82, Factors=30
Iteration 80: time=10.77, Factors=30
Iteration 81: time=10.91, ELBO=3515690.31, deltaELBO=993.803 (0.00075010%), Factors=30
Iteration 82: time=10.84, Factors=30
Iteration 83: time=10.83, Factors=30
Iteration 84: time=10.80, Factors=30
Iteration 85: time=10.85, Factors=30
Iteration 86: time=10.95, ELBO=3516432.18, deltaELBO=741.873 (0.00055995%), Factors=30
Iteration 87: time=10.86, Factors=30
Iteration 88: time=10.90, Factors=30
Iteration 89: time=10.77, Factors=30
Iteration 90: time=10.70, Factors=30
Iteration 91: time=10.89, ELBO=3517017.27, deltaELBO=585.086 (0.00044161%), Factors=30
Iteration 92: time=10.74, Factors=30
Iteration 93: time=10.80, Factors=30
Iteration 94: time=10.42, Factors=30
Iteration 95: time=10.34, Factors=30
Iteration 96: time=10.52, ELBO=3517487.35, deltaELBO=470.086 (0.00035481%), Factors=30
Iteration 97: time=10.48, Factors=30
Iteration 98: time=10.64, Factors=30
Iteration 99: time=10.69, Factors=30
Iteration 100: time=10.42, Factors=30
Iteration 101: time=10.42, ELBO=3517868.74, deltaELBO=381.383 (0.00028786%), Factors=30
Iteration 102: time=10.39, Factors=30
Iteration 103: time=10.41, Factors=30
Iteration 104: time=10.43, Factors=30
Iteration 105: time=10.20, Factors=30
Iteration 106: time=10.37, ELBO=3518189.35, deltaELBO=320.618 (0.00024200%), Factors=30
Iteration 107: time=10.19, Factors=30
Iteration 108: time=10.51, Factors=30
Iteration 109: time=10.40, Factors=30
Iteration 110: time=10.71, Factors=30
Iteration 111: time=10.72, ELBO=3518481.19, deltaELBO=291.833 (0.00022027%), Factors=30
Iteration 112: time=10.58, Factors=30
Iteration 113: time=10.62, Factors=30
Iteration 114: time=10.55, Factors=30
Iteration 115: time=10.76, Factors=30
Iteration 116: time=10.40, ELBO=3518755.40, deltaELBO=274.218 (0.00020697%), Factors=30
Iteration 117: time=10.46, Factors=30
Iteration 118: time=10.47, Factors=30
Iteration 119: time=10.38, Factors=30
Iteration 120: time=10.36, Factors=30
Iteration 121: time=10.34, ELBO=3519004.66, deltaELBO=249.255 (0.00018813%), Factors=30
Iteration 122: time=10.34, Factors=30
Iteration 123: time=10.38, Factors=30
Iteration 124: time=10.21, Factors=30
Iteration 125: time=10.25, Factors=30
Iteration 126: time=10.23, ELBO=3519230.34, deltaELBO=225.680 (0.00017034%), Factors=30
Iteration 127: time=10.22, Factors=30
Iteration 128: time=10.34, Factors=30
Iteration 129: time=10.55, Factors=30
Iteration 130: time=10.66, Factors=30
Iteration 131: time=10.72, ELBO=3519455.12, deltaELBO=224.777 (0.00016966%), Factors=30
Iteration 132: time=10.69, Factors=30
Iteration 133: time=10.58, Factors=30
Iteration 134: time=10.55, Factors=30
Iteration 135: time=11.06, Factors=30
Iteration 136: time=10.72, ELBO=3519672.23, deltaELBO=217.114 (0.00016387%), Factors=30
Iteration 137: time=10.53, Factors=30
Iteration 138: time=10.68, Factors=30
Iteration 139: time=10.47, Factors=30
Iteration 140: time=10.36, Factors=30
Iteration 141: time=10.61, ELBO=3519885.70, deltaELBO=213.475 (0.00016113%), Factors=30
Iteration 142: time=10.64, Factors=30
Iteration 143: time=10.38, Factors=30
Iteration 144: time=10.35, Factors=30
Iteration 145: time=10.29, Factors=30
Iteration 146: time=10.41, ELBO=3520095.28, deltaELBO=209.580 (0.00015819%), Factors=30
Iteration 147: time=10.49, Factors=30
Iteration 148: time=10.45, Factors=30
Iteration 149: time=10.62, Factors=30
Iteration 150: time=10.42, Factors=30
Iteration 151: time=10.47, ELBO=3520303.13, deltaELBO=207.844 (0.00015688%), Factors=30
Iteration 152: time=10.44, Factors=30
Iteration 153: time=10.27, Factors=30
Iteration 154: time=10.17, Factors=30
Iteration 155: time=10.00, Factors=30
Iteration 156: time=10.29, ELBO=3520517.97, deltaELBO=214.838 (0.00016216%), Factors=30
Iteration 157: time=10.36, Factors=30
Iteration 158: time=10.39, Factors=30
Iteration 159: time=10.17, Factors=30
Iteration 160: time=10.42, Factors=30
Iteration 161: time=10.61, ELBO=3520733.62, deltaELBO=215.657 (0.00016277%), Factors=30
Iteration 162: time=10.45, Factors=30
Iteration 163: time=10.54, Factors=30
Iteration 164: time=10.34, Factors=30
Iteration 165: time=10.41, Factors=30
Iteration 166: time=10.34, ELBO=3520962.54, deltaELBO=228.911 (0.00017278%), Factors=30
Iteration 167: time=10.39, Factors=30
Iteration 168: time=10.35, Factors=30
Iteration 169: time=10.52, Factors=30
Iteration 170: time=10.59, Factors=30
Iteration 171: time=10.50, ELBO=3521187.99, deltaELBO=225.453 (0.00017017%), Factors=30
Iteration 172: time=10.28, Factors=30
Iteration 173: time=10.04, Factors=30
Iteration 174: time=10.15, Factors=30
Iteration 175: time=10.20, Factors=30
Iteration 176: time=10.07, ELBO=3521417.84, deltaELBO=229.853 (0.00017349%), Factors=30
Iteration 177: time=10.47, Factors=30
Iteration 178: time=10.57, Factors=30
Iteration 179: time=10.58, Factors=30
Iteration 180: time=10.52, Factors=30
Iteration 181: time=10.58, ELBO=3521649.14, deltaELBO=231.295 (0.00017458%), Factors=30
Iteration 182: time=10.55, Factors=30
Iteration 183: time=10.52, Factors=30
Iteration 184: time=10.22, Factors=30
Iteration 185: time=10.33, Factors=30
Iteration 186: time=10.51, ELBO=3521884.19, deltaELBO=235.055 (0.00017741%), Factors=30
Iteration 187: time=10.52, Factors=30
Iteration 188: time=10.28, Factors=30
Iteration 189: time=10.74, Factors=30
Iteration 190: time=10.70, Factors=30
Iteration 191: time=10.61, ELBO=3522087.38, deltaELBO=203.188 (0.00015336%), Factors=30
Iteration 192: time=10.65, Factors=30
Iteration 193: time=10.46, Factors=30
Iteration 194: time=10.35, Factors=30
Iteration 195: time=10.38, Factors=30
Iteration 196: time=10.47, ELBO=3522249.39, deltaELBO=162.006 (0.00012228%), Factors=30
Iteration 197: time=10.40, Factors=30
Iteration 198: time=10.47, Factors=30
Iteration 199: time=10.51, Factors=30
Iteration 200: time=10.60, Factors=30
Iteration 201: time=10.65, ELBO=3522388.87, deltaELBO=139.489 (0.00010528%), Factors=30
Iteration 202: time=10.53, Factors=30
Iteration 203: time=10.54, Factors=30
Iteration 204: time=10.51, Factors=30
Iteration 205: time=10.55, Factors=30
Iteration 206: time=10.65, ELBO=3522510.83, deltaELBO=121.954 (0.00009205%), Factors=30
Iteration 207: time=10.67, Factors=30
Iteration 208: time=10.70, Factors=30
Iteration 209: time=10.68, Factors=30
Iteration 210: time=10.51, Factors=30
Iteration 211: time=10.60, ELBO=3522625.47, deltaELBO=114.645 (0.00008653%), Factors=30
Iteration 212: time=10.78, Factors=30
Iteration 213: time=11.08, Factors=30
Iteration 214: time=10.73, Factors=30
Iteration 215: time=10.64, Factors=30
Iteration 216: time=10.69, ELBO=3522740.24, deltaELBO=114.764 (0.00008662%), Factors=30
Iteration 217: time=10.66, Factors=30
Iteration 218: time=10.60, Factors=30
Iteration 219: time=10.58, Factors=30
Iteration 220: time=10.53, Factors=30
Iteration 221: time=10.79, ELBO=3522844.27, deltaELBO=104.034 (0.00007852%), Factors=30
Iteration 222: time=10.71, Factors=30
Iteration 223: time=10.73, Factors=30
Iteration 224: time=10.63, Factors=30
Iteration 225: time=10.65, Factors=30
Iteration 226: time=10.58, ELBO=3522950.33, deltaELBO=106.061 (0.00008005%), Factors=30
Iteration 227: time=10.62, Factors=30
Iteration 228: time=10.53, Factors=30
Iteration 229: time=10.62, Factors=30
Iteration 230: time=10.61, Factors=30
Iteration 231: time=10.69, ELBO=3523051.08, deltaELBO=100.746 (0.00007604%), Factors=30
Iteration 232: time=10.55, Factors=30
Iteration 233: time=10.60, Factors=30
Iteration 234: time=10.55, Factors=30
Iteration 235: time=10.70, Factors=30
Iteration 236: time=10.78, ELBO=3523147.03, deltaELBO=95.951 (0.00007242%), Factors=30
Iteration 237: time=10.70, Factors=30
Iteration 238: time=10.51, Factors=30
Iteration 239: time=10.56, Factors=30
Iteration 240: time=10.53, Factors=30
Iteration 241: time=10.52, ELBO=3523240.55, deltaELBO=93.521 (0.00007059%), Factors=30
Iteration 242: time=10.56, Factors=30
Iteration 243: time=10.58, Factors=30
Iteration 244: time=10.80, Factors=30
Iteration 245: time=10.48, Factors=30
Iteration 246: time=10.65, ELBO=3523339.09, deltaELBO=98.544 (0.00007438%), Factors=30
Iteration 247: time=10.50, Factors=30
Iteration 248: time=10.52, Factors=30
Iteration 249: time=10.75, Factors=30
Iteration 250: time=10.79, Factors=30
Iteration 251: time=10.79, ELBO=3523433.87, deltaELBO=94.778 (0.00007154%), Factors=30
Iteration 252: time=10.62, Factors=30
Iteration 253: time=10.58, Factors=30
Iteration 254: time=10.55, Factors=30
Iteration 255: time=10.62, Factors=30
Iteration 256: time=10.50, ELBO=3523517.86, deltaELBO=83.987 (0.00006339%), Factors=30
Iteration 257: time=10.59, Factors=30
Iteration 258: time=10.49, Factors=30
Iteration 259: time=10.29, Factors=30
Iteration 260: time=10.57, Factors=30
Iteration 261: time=10.57, ELBO=3523587.97, deltaELBO=70.112 (0.00005292%), Factors=30
Iteration 262: time=10.75, Factors=30
Iteration 263: time=10.73, Factors=30
Iteration 264: time=10.57, Factors=30
Iteration 265: time=10.46, Factors=30
Iteration 266: time=10.62, ELBO=3523656.07, deltaELBO=68.095 (0.00005140%), Factors=30
Iteration 267: time=10.70, Factors=30
Iteration 268: time=10.76, Factors=30
Iteration 269: time=10.73, Factors=30
Iteration 270: time=10.67, Factors=30
Iteration 271: time=10.84, ELBO=3523726.93, deltaELBO=70.859 (0.00005348%), Factors=30
Iteration 272: time=10.42, Factors=30
Iteration 273: time=10.10, Factors=30
Iteration 274: time=10.24, Factors=30
Iteration 275: time=10.39, Factors=30
Iteration 276: time=10.56, ELBO=3523794.04, deltaELBO=67.113 (0.00005066%), Factors=30
Iteration 277: time=10.57, Factors=30
Iteration 278: time=10.63, Factors=30
Iteration 279: time=10.59, Factors=30
Iteration 280: time=10.55, Factors=30
Iteration 281: time=10.59, ELBO=3523861.29, deltaELBO=67.255 (0.00005076%), Factors=30
Iteration 282: time=10.57, Factors=30
Iteration 283: time=10.61, Factors=30
Iteration 284: time=10.50, Factors=30
Iteration 285: time=10.60, Factors=30
Iteration 286: time=10.65, ELBO=3523930.38, deltaELBO=69.090 (0.00005215%), Factors=30
Iteration 287: time=10.55, Factors=30
Iteration 288: time=10.34, Factors=30
Iteration 289: time=10.69, Factors=30
Iteration 290: time=10.50, Factors=30
Iteration 291: time=10.47, ELBO=3523995.62, deltaELBO=65.239 (0.00004924%), Factors=30
Iteration 292: time=10.39, Factors=30
Iteration 293: time=10.44, Factors=30
Iteration 294: time=10.66, Factors=30
Iteration 295: time=10.79, Factors=30
Iteration 296: time=10.74, ELBO=3524060.45, deltaELBO=64.828 (0.00004893%), Factors=30
Converged!
#######################
## Training finished ##
#######################
Warning: Output file /nfs/team205/ed6/data/Fetal_immune/LMM_data/LMM_input_LYMPHOID_PBULK/LYMPHOID_mofa_model_oneview_organCorrected.hdf5 already exists, it will be replaced
Saving model in /nfs/team205/ed6/data/Fetal_immune/LMM_data/LMM_input_LYMPHOID_PBULK/LYMPHOID_mofa_model_oneview_organCorrected.hdf5...
10 factors were found to explain no variance and they were removed for downstream analysis. You can disable this option by setting load_model(..., remove_inactive_factors = FALSE)
Error in .quality_control(object, verbose = verbose) :
!duplicated(unlist(samples_names(object))) are not all TRUE
### Tweaking the MOFA2 loading function because the quality control complains
load_model <- function(file, sort_factors = TRUE, on_disk = FALSE, load_data = TRUE,
remove_outliers = FALSE, remove_inactive_factors = TRUE, verbose = FALSE,
load_interpol_Z = FALSE) {
# Create new MOFAodel object
object <- new("MOFA")
object@status <- "trained"
# Set on_disk option
if (on_disk) {
object@on_disk <- TRUE
} else {
object@on_disk <- FALSE
}
# Get groups and data set names from the hdf5 file object
h5ls.out <- h5ls(file, datasetinfo = FALSE)
########################
## Load training data ##
########################
# Load names
if ("views" %in% h5ls.out$name) {
view_names <- as.character( h5read(file, "views")[[1]] )
group_names <- as.character( h5read(file, "groups")[[1]] )
feature_names <- h5read(file, "features")[view_names]
sample_names <- h5read(file, "samples")[group_names]
} else { # for old models
feature_names <- h5read(file, "features")
sample_names <- h5read(file, "samples")
view_names <- names(feature_names)
group_names <- names(sample_names)
h5ls.out <- h5ls.out[grep("variance_explained", h5ls.out$name, invert = TRUE),]
}
if("covariates" %in% h5ls.out$name){
covariate_names <- as.character( h5read(file, "covariates")[[1]])
} else {
covariate_names <- NULL
}
# Load training data (as nested list of matrices)
data <- list(); intercepts <- list()
if (load_data && "data"%in%h5ls.out$name) {
object@data_options[["loaded"]] <- TRUE
if (verbose) message("Loading data...")
for (m in view_names) {
data[[m]] <- list()
intercepts[[m]] <- list()
for (g in group_names) {
if (on_disk) {
# as DelayedArrays
data[[m]][[g]] <- DelayedArray::DelayedArray( HDF5ArraySeed(file, name = sprintf("data/%s/%s", m, g) ) )
} else {
# as matrices
data[[m]][[g]] <- h5read(file, sprintf("data/%s/%s", m, g) )
tryCatch(intercepts[[m]][[g]] <- as.numeric( h5read(file, sprintf("intercepts/%s/%s", m, g) ) ), error = function(e) { NULL })
}
# Replace NaN by NA
data[[m]][[g]][is.nan(data[[m]][[g]])] <- NA # this realised into memory, TO FIX
}
}
# Create empty training data (as nested list of empty matrices, with the correct dimensions)
} else {
object@data_options[["loaded"]] <- FALSE
for (m in view_names) {
data[[m]] <- list()
for (g in group_names) {
data[[m]][[g]] <- .create_matrix_placeholder(rownames = feature_names[[m]], colnames = sample_names[[g]])
}
}
}
object@data <- data
object@intercepts <- intercepts
# Load metadata if any
if ("samples_metadata" %in% h5ls.out$name) {
object@samples_metadata <- bind_rows(lapply(group_names, function(g) as.data.frame(h5read(file, sprintf("samples_metadata/%s", g)))))
}
if ("features_metadata" %in% h5ls.out$name) {
object@features_metadata <- bind_rows(lapply(view_names, function(m) as.data.frame(h5read(file, sprintf("features_metadata/%s", m)))))
}
# ############################
# ## Load sample covariates ##
# ############################
#
# if (any(grepl("cov_samples", h5ls.out$group))){
# covariates <- list()
# for (g in group_names) {
# if (on_disk) {
# # as DelayedArrays
# covariates[[g]] <- DelayedArray::DelayedArray( HDF5ArraySeed(file, name = sprintf("cov_samples/%s", g) ) )
# } else {
# # as matrices
# covariates[[g]] <- h5read(file, sprintf("cov_samples/%s", g) )
# }
# }
# } else covariates <- NULL
# object@covariates <- covariates
# if (any(grepl("cov_samples_transformed", h5ls.out$group))){
# covariates_warped <- list()
# for (g in group_names) {
# if (on_disk) {
# # as DelayedArrays
# covariates_warped[[g]] <- DelayedArray::DelayedArray( HDF5ArraySeed(file, name = sprintf("cov_samples_transformed/%s", g) ) )
# } else {
# # as matrices
# covariates_warped[[g]] <- h5read(file, sprintf("cov_samples_transformed/%s", g) )
# }
# }
# } else covariates_warped <- NULL
# object@covariates_warped <- covariates_warped
# #######################
# ## Load interpolated factor values ##
# #######################
#
# interpolated_Z <- list()
# if (isTRUE(load_interpol_Z)) {
#
# if (isTRUE(verbose)) message("Loading interpolated factor values...")
#
# for (g in group_names) {
# interpolated_Z[[g]] <- list()
# if (on_disk) {
# # as DelayedArrays
# # interpolated_Z[[g]] <- DelayedArray::DelayedArray( HDF5ArraySeed(file, name = sprintf("Z_predictions/%s", g) ) )
# } else {
# # as matrices
# tryCatch( {
# interpolated_Z[[g]][["mean"]] <- h5read(file, sprintf("Z_predictions/%s/mean", g) )
# }, error = function(x) { print("Predicitions of Z not found, not loading it...") })
# tryCatch( {
# interpolated_Z[[g]][["variance"]] <- h5read(file, sprintf("Z_predictions/%s/variance", g) )
# }, error = function(x) { print("Variance of predictions of Z not found, not loading it...") })
# tryCatch( {
# interpolated_Z[[g]][["new_values"]] <- h5read(file, "Z_predictions/new_values")
# }, error = function(x) { print("New values of Z not found, not loading it...") })
# }
# }
# }
# object@interpolated_Z <- interpolated_Z
#######################
## Load expectations ##
#######################
expectations <- list()
node_names <- h5ls.out[h5ls.out$group=="/expectations","name"]
if (verbose) message(paste0("Loading expectations for ", length(node_names), " nodes..."))
if ("AlphaW" %in% node_names)
expectations[["AlphaW"]] <- h5read(file, "expectations/AlphaW")[view_names]
if ("AlphaZ" %in% node_names)
expectations[["AlphaZ"]] <- h5read(file, "expectations/AlphaZ")[group_names]
if ("Sigma" %in% node_names)
expectations[["Sigma"]] <- h5read(file, "expectations/Sigma")
if ("Z" %in% node_names)
expectations[["Z"]] <- h5read(file, "expectations/Z")[group_names]
if ("W" %in% node_names)
expectations[["W"]] <- h5read(file, "expectations/W")[view_names]
if ("ThetaW" %in% node_names)
expectations[["ThetaW"]] <- h5read(file, "expectations/ThetaW")[view_names]
if ("ThetaZ" %in% node_names)
expectations[["ThetaZ"]] <- h5read(file, "expectations/ThetaZ")[group_names]
# if ("Tau" %in% node_names)
# expectations[["Tau"]] <- h5read(file, "expectations/Tau")
object@expectations <- expectations
########################
## Load model options ##
########################
if (verbose) message("Loading model options...")
tryCatch( {
object@model_options <- as.list(h5read(file, 'model_options', read.attributes = TRUE))
}, error = function(x) { print("Model options not found, not loading it...") })
# Convert True/False strings to logical values
for (i in names(object@model_options)) {
if (object@model_options[i] == "False" || object@model_options[i] == "True") {
object@model_options[i] <- as.logical(object@model_options[i])
} else {
object@model_options[i] <- object@model_options[i]
}
}
##########################################
## Load training options and statistics ##
##########################################
if (verbose) message("Loading training options and statistics...")
# Load training options
if (length(object@training_options) == 0) {
tryCatch( {
object@training_options <- as.list(h5read(file, 'training_opts', read.attributes = TRUE))
}, error = function(x) { print("Training opts not found, not loading it...") })
}
# Load training statistics
tryCatch( {
object@training_stats <- h5read(file, 'training_stats', read.attributes = TRUE)
object@training_stats <- h5read(file, 'training_stats', read.attributes = TRUE)
}, error = function(x) { print("Training stats not found, not loading it...") })
#############################
## Load covariates options ##
#############################
#
# if (any(grepl("cov_samples", h5ls.out$group))) {
# if (isTRUE(verbose)) message("Loading covariates options...")
# tryCatch( {
# object@mefisto_options <- as.list(h5read(file, 'smooth_opts', read.attributes = TRUE))
# }, error = function(x) { print("Covariates options not found, not loading it...") })
#
# # Convert True/False strings to logical values
# for (i in names(object@mefisto_options)) {
# if (object@mefisto_options[i] == "False" | object@mefisto_options[i] == "True") {
# object@mefisto_options[i] <- as.logical(object@mefisto_options[i])
# } else {
# object@mefisto_options[i] <- object@mefisto_options[i]
# }
# }
#
# }
#
#######################################
## Load variance explained estimates ##
#######################################
if ("variance_explained" %in% h5ls.out$name) {
r2_list <- list(
r2_total = h5read(file, "variance_explained/r2_total")[group_names],
r2_per_factor = h5read(file, "variance_explained/r2_per_factor")[group_names]
)
object@cache[["variance_explained"]] <- r2_list
}
# Hack to fix the problems where variance explained values range from 0 to 1 (%)
if (max(sapply(object@cache$variance_explained$r2_total,max,na.rm=TRUE),na.rm=TRUE)<1) {
for (m in 1:length(view_names)) {
for (g in 1:length(group_names)) {
object@cache$variance_explained$r2_total[[g]][[m]] <- 100 * object@cache$variance_explained$r2_total[[g]][[m]]
object@cache$variance_explained$r2_per_factor[[g]][,m] <- 100 * object@cache$variance_explained$r2_per_factor[[g]][,m]
}
}
}
##############################
## Specify dimensionalities ##
##############################
# Specify dimensionality of the data
object@dimensions[["M"]] <- length(data) # number of views
object@dimensions[["G"]] <- length(data[[1]]) # number of groups
object@dimensions[["N"]] <- sapply(data[[1]], ncol) # number of samples (per group)
object@dimensions[["D"]] <- sapply(data, function(e) nrow(e[[1]])) # number of features (per view)
# object@dimensions[["C"]] <- nrow(covariates[[1]]) # number of covariates
object@dimensions[["K"]] <- ncol(object@expectations$Z[[1]]) # number of factors
# Assign sample and feature names (slow for large matrices)
if (verbose) message("Assigning names to the different dimensions...")
# Create default features names if they are null
if (is.null(feature_names)) {
print("Features names not found, generating default: feature1_view1, ..., featureD_viewM")
feature_names <- lapply(seq_len(object@dimensions[["M"]]),
function(m) sprintf("feature%d_view_&d", as.character(seq_len(object@dimensions[["D"]][m])), m))
} else {
# Check duplicated features names
all_names <- unname(unlist(feature_names))
duplicated_names <- unique(all_names[duplicated(all_names)])
if (length(duplicated_names)>0)
warning("There are duplicated features names across different views. We will add the suffix *_view* only for those features
Example: if you have both TP53 in mRNA and mutation data it will be renamed to TP53_mRNA, TP53_mutation")
for (m in names(feature_names)) {
tmp <- which(feature_names[[m]] %in% duplicated_names)
if (length(tmp)>0) feature_names[[m]][tmp] <- paste(feature_names[[m]][tmp], m, sep="_")
}
}
features_names(object) <- feature_names
# Create default samples names if they are null
if (is.null(sample_names)) {
print("Samples names not found, generating default: sample1, ..., sampleN")
sample_names <- lapply(object@dimensions[["N"]], function(n) paste0("sample", as.character(seq_len(n))))
}
samples_names(object) <- sample_names
# Add covariates names
# if(!is.null(object@covariates)){
# # Create default covariates names if they are null
# if (is.null(covariate_names)) {
# print("Covariate names not found, generating default: covariate1, ..., covariateC")
# covariate_names <- paste0("sample", as.character(seq_len(object@dimensions[["C"]])))
# }
# covariates_names(object) <- covariate_names
# }
# Set views names
if (is.null(names(object@data))) {
print("Views names not found, generating default: view1, ..., viewM")
view_names <- paste0("view", as.character(seq_len(object@dimensions[["M"]])))
}
views_names(object) <- view_names
# Set groups names
if (is.null(names(object@data[[1]]))) {
print("Groups names not found, generating default: group1, ..., groupG")
group_names <- paste0("group", as.character(seq_len(object@dimensions[["G"]])))
}
groups_names(object) <- group_names
# Set factors names
factors_names(object) <- paste0("Factor", as.character(seq_len(object@dimensions[["K"]])))
###################
## Parse factors ##
###################
# Calculate variance explained estimates per factor
if (is.null(object@cache[["variance_explained"]])) {
object@cache[["variance_explained"]] <- calculate_variance_explained(object)
}
# Remove inactive factors
if (remove_inactive_factors) {
r2 <- rowSums(do.call('cbind', lapply(object@cache[["variance_explained"]]$r2_per_factor, rowSums, na.rm=TRUE)))
var.threshold <- 0.0001
if (all(r2 < var.threshold)) {
warning(sprintf("All %s factors were found to explain little or no variance so remove_inactive_factors option has been disabled.", length(r2)))
} else if (any(r2 < var.threshold)) {
object <- subset_factors(object, which(r2>=var.threshold))
message(sprintf("%s factors were found to explain no variance and they were removed for downstream analysis. You can disable this option by setting load_model(..., remove_inactive_factors = FALSE)", sum(r2 < var.threshold)))
}
}
# [Done in mofapy2] Sort factors by total variance explained
if (sort_factors && object@dimensions$K>1) {
# Sanity checks
if (verbose) message("Re-ordering factors by their variance explained...")
# Calculate variance explained per factor across all views
r2 <- rowSums(sapply(object@cache[["variance_explained"]]$r2_per_factor, function(e) rowSums(e, na.rm = TRUE)))
order_factors <- c(names(r2)[order(r2, decreasing = TRUE)])
# re-order factors
object <- subset_factors(object, order_factors)
}
# Mask outliers
if (remove_outliers) {
if (verbose) message("Removing outliers...")
object <- .detect_outliers(object)
}
# Mask intercepts for non-Gaussian data
if (any(object@model_options$likelihoods!="gaussian")) {
for (m in names(which(object@model_options$likelihoods!="gaussian"))) {
for (g in names(object@intercepts[[m]])) {
object@intercepts[[m]][[g]] <- NA
}
}
}
# ######################
# ## Quality controls ##
# ######################
#
# if (verbose) message("Doing quality control...")
# object <- .quality_control(object, verbose = verbose)
#
return(object)
}
mofa_trained <- load_model(outfile)
Recalculating total variance explained values (r2_total)...
10 factors were found to explain no variance and they were removed for downstream analysis. You can disable this option by setting load_model(..., remove_inactive_factors = FALSE)
samples_names(mofa_trained) <- samples_names(object)
samples_metadata(mofa_trained)
rownames(samples_metadata(mofa_trained)) <- samples_metadata(mofa_trained)[["sample"]]
plot_variance_explained(mofa_trained, x='factor', y='group', split_by = 'view', plot_total = TRUE, max_r2 = 50)[[1]] +
theme(axis.text.x = element_text(angle=45, hjust=1))
get_variance_explained(mofa_trained, as.data.frame = TRUE)[[2]] %>%
ggplot(aes(group, value)) +
geom_col() +
coord_flip() +
ylab("Var. (%)") +
theme_classic(base_size=14)
Plot by celltype
get_variance_explained(mofa_trained, as.data.frame = TRUE)[[1]] %>%
ggplot(aes(factor, value)) + geom_col() +
coord_flip() +
facet_wrap(group~., ncol = 6, scales = "free_x")
plot_factor_cor(mofa_trained, method = "spearman")
## Correlation with principal components
pcs <- reducedDim(sce)
fctrs <- get_factors(mofa_trained) %>%
purrr::reduce(rbind)
corrplot::corrplot(cor(pcs, fctrs[rownames(pcs),]))
for (f in 1:mofa_trained@dimensions$K){
print(paste0("Saving ID for Factor ", f, "..."))
save_factor_id(mofa_trained, f=f, figdir = figdir)
}
[1] "Saving ID for Factor 1..."
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
[1] "Saving ID for Factor 2..."
[1] "Saving ID for Factor 3..."
[1] "Saving ID for Factor 4..."
[1] "Saving ID for Factor 5..."
[1] "Saving ID for Factor 6..."
[1] "Saving ID for Factor 7..."
[1] "Saving ID for Factor 8..."
[1] "Saving ID for Factor 9..."
[1] "Saving ID for Factor 10..."
[1] "Saving ID for Factor 11..."
[1] "Saving ID for Factor 12..."
[1] "Saving ID for Factor 13..."
[1] "Saving ID for Factor 14..."
[1] "Saving ID for Factor 15..."
[1] "Saving ID for Factor 16..."
[1] "Saving ID for Factor 17..."
[1] "Saving ID for Factor 18..."
[1] "Saving ID for Factor 19..."
[1] "Saving ID for Factor 20..."
plot_ct_KNN_graph(knn, color_by = 'Factor5')
Using `stress` as default layout
connectivity_test_ls <- lapply(all_groups, function(g) test_conn_group(mofa_trained, g))
```r
connectivity_test_df %>%
group_by(group) %>%
mutate(mean_val=median(score)) %>%
ungroup() %>%
arrange(-mean_val) %>%
mutate(group=factor(group, levels=unique(group))) %>%
ggplot(aes(organ, log1p(score))) +
geom_col(fill=\grey\) +
geom_col(data=. %>% filter(is_signif), aes(fill=organ)) +
scale_fill_manual(values=org_colors) +
coord_flip() +
facet_grid(group~.) +
theme(strip.text.y = element_text(angle=0))
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
#### Expression of top R2 factors
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuZm9yIChnIGluIGFsbF9ncm91cHMpe1xuICBmcyA8LSBnZXRfdG9wX2ZhY3Rvcl9wZXJfY2VsbHR5cGUobW9mYV90cmFpbmVkLCBnLCBtaW5fUjI9MylcbiAgdG9wX3Bsb3RzIDwtIGxhcHBseShmcywgZnVuY3Rpb24oeCkgKHBsb3RfZGF0YV90b3Bfd2VpZ2h0cyhtb2ZhX3RyYWluZWQsIGcsIHgsIHdoaWNoPVwidG9wXCIpICsgcmVtb3ZlX3hfYXhpcygpKSAvICBcbiAgICAgICAgICAgICAgICAgICAgICAgIHBsb3RfZGF0YV90b3Bfd2VpZ2h0cyhtb2ZhX3RyYWluZWQsIGcsIHgsIHdoaWNoPVwiYm90dG9tXCIpICsgZ2d0aXRsZShcIlwiKVxuICApXG4gIGZ1bGxfcGwgPC13cmFwX3Bsb3RzKHRvcF9wbG90cywgbmNvbD0xKSBcbiAgZ2dzYXZlKGdsdWUoXCJ7ZmlnZGlyfS90b3BfZmFjdG9yc19leHByX3tnfS5wZGZcIikscGxvdD1mdWxsX3BsLCAgd2lkdGg9MTIsIGhlaWdodCA9IDcqbGVuZ3RoKHRvcF9wbG90cykpXG59XG5cbmBgYCJ9 -->
```r
for (g in all_groups){
fs <- get_top_factor_per_celltype(mofa_trained, g, min_R2=3)
top_plots <- lapply(fs, function(x) (plot_data_top_weights(mofa_trained, g, x, which="top") + remove_x_axis()) /
plot_data_top_weights(mofa_trained, g, x, which="bottom") + ggtitle("")
)
full_pl <-wrap_plots(top_plots, ncol=1)
ggsave(glue("{figdir}/top_factors_expr_{g}.pdf"),plot=full_pl, width=12, height = 7*length(top_plots))
}
Joining, by = "sample"
Joining, by = "sample"
Joining, by = "sample"
Joining, by = "sample"
Joining, by = "sample"
Joining, by = "sample"
Joining, by = "sample"
Joining, by = "sample"
Joining, by = "sample"
Joining, by = "sample"
Joining, by = "sample"
Joining, by = "sample"
```r
# BiocManager::install(\MOFAdata\)
library(MOFAdata)
utils::data(reactomeGS)
head(rownames(reactomeGS))
## Remove row with NA
reactomeGS <- reactomeGS[!is.na(rownames(reactomeGS)),]
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxubGlicmFyeShFbnNEYi5Ic2FwaWVucy52ODYpXG5oZy5wYWlycyA8LSByZWFkUkRTKHN5c3RlbS5maWxlKFxcZXhkYXRhXFwsIFxcaHVtYW5fY3ljbGVfbWFya2Vycy5yZHNcXCwgcGFja2FnZT1cXHNjcmFuXFwpKVxuYWxsX2dlbmVzIDwtIGVuc2VtYmxkYjo6Z2VuZXMoRW5zRGIuSHNhcGllbnMudjg2KVxuZGV0YWNoKHBhY2thZ2U6RW5zRGIuSHNhcGllbnMudjg2KVxuZGV0YWNoKHBhY2thZ2U6ZW5zZW1ibGRiKVxuXG4jIGdlbmVfbmFtZV8yX2lkIDwtIGZ1bmN0aW9uKGdlbmUpe1xuIyAgICByZXR1cm4oYWxsX2dlbmVzW2FsbF9nZW5lcyRnZW5lX25hbWU9PWdlbmUsXSRnZW5lX2lkWzFdKVxuIyB9XG4jIFxuIyBnZW5lX2lkcyA8LSBzYXBwbHkobW9mYV90cmFpbmVkQGZlYXR1cmVzX21ldGFkYXRhJGZlYXR1cmUsIGdlbmVfbmFtZV8yX2lkKVxuIyByb3dEYXRhKHNjZSlbXFxnZW5lX2lkXFxdIDwtIGdlbmVfaWRzXG4jIHJvd0RhdGEoc2NlKVtcXGdlbmVfbmFtZVxcXSA8LSByb3duYW1lcyhzY2UpXG5cbmdlbmVfbmFtZXNfcmVhY3RvbWUgPC0gYWxsX2dlbmVzW2NvbG5hbWVzKHJlYWN0b21lR1MpXSRnZW5lX25hbWVcbmNvbG5hbWVzKHJlYWN0b21lR1MpIDwtIGdlbmVfbmFtZXNfcmVhY3RvbWVcbmBgYFxuYGBgIn0= -->
```r
```r
library(EnsDb.Hsapiens.v86)
hg.pairs <- readRDS(system.file(\exdata\, \human_cycle_markers.rds\, package=\scran\))
all_genes <- ensembldb::genes(EnsDb.Hsapiens.v86)
detach(package:EnsDb.Hsapiens.v86)
detach(package:ensembldb)
# gene_name_2_id <- function(gene){
# return(all_genes[all_genes$gene_name==gene,]$gene_id[1])
# }
#
# gene_ids <- sapply(mofa_trained@features_metadata$feature, gene_name_2_id)
# rowData(sce)[\gene_id\] <- gene_ids
# rowData(sce)[\gene_name\] <- rownames(sce)
gene_names_reactome <- all_genes[colnames(reactomeGS)]$gene_name
colnames(reactomeGS) <- gene_names_reactome
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
Subset to genes tested
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxucmVhY3RvbWVHU191bml2ZXJzZSA8LSByZWFjdG9tZUdTWywgY29sbmFtZXMocmVhY3RvbWVHUykgJWluJSBtb2ZhX3RyYWluZWRAZmVhdHVyZXNfbWV0YWRhdGEkZmVhdHVyZV1cbmBgYFxuYGBgIn0= -->
```r
```r
reactomeGS_universe <- reactomeGS[, colnames(reactomeGS) %in% mofa_trained@features_metadata$feature]
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuIyBHU0VBIG9uIHBvc2l0aXZlIHdlaWdodHMsIHdpdGggZGVmYXVsdCBvcHRpb25zXG5yZXMucG9zaXRpdmUgPC0gcnVuX2VucmljaG1lbnQobW9mYV90cmFpbmVkLFxuICB2aWV3PSdzY2FsZWRfbG9nY291bnRzJyxcbiAgIyBzdGF0aXN0aWNhbC50ZXN0ID0gJ2Nvci5hZGoucGFyYW1ldHJpYycsXG4gIGZlYXR1cmUuc2V0cyA9IHJlYWN0b21lR1NfdW5pdmVyc2UsIFxuICBzaWduID0gXFxwb3NpdGl2ZVxcLFxuKVxuXG4jIEdTRUEgb24gbmVnYXRpdmUgd2VpZ2h0cywgd2l0aCBkZWZhdWx0IG9wdGlvbnNcbnJlcy5uZWdhdGl2ZSA8LSBydW5fZW5yaWNobWVudChtb2ZhX3RyYWluZWQsIFxuICB2aWV3PSdzY2FsZWRfbG9nY291bnRzJyxcbiAgIyBzdGF0aXN0aWNhbC50ZXN0ID0gJ2Nvci5hZGoucGFyYW1ldHJpYycsXG4gIGZlYXR1cmUuc2V0cyA9IHJlYWN0b21lR1NfdW5pdmVyc2UsIFxuICBzaWduID0gXFxuZWdhdGl2ZVxcXG4pXG5cblxuZm9yIChmIGluIDE6bW9mYV90cmFpbmVkQGRpbWVuc2lvbnMkSyl7XG4gIGlmIChtaW4ocmVzLnBvc2l0aXZlJHB2YWwuYWRqWyxwYXN0ZTAoXFxGYWN0b3JcXCwgZildKSA8IDAuMSkge1xuICAgIHByaW50KHBsb3RfZW5yaWNobWVudChyZXMucG9zaXRpdmUsIGZhY3RvciA9IGYsIGFscGhhPTAuMSkgKyBnZ3RpdGxlKFxcUG9zaXRpdmUgd2VpZ2h0c1xcKSArXG4gICAgICAgICAgICBwbG90X2VucmljaG1lbnQocmVzLm5lZ2F0aXZlLCBmYWN0b3IgPSBmLCBhbHBoYT0wLjEpICsgZ2d0aXRsZShcXE5lZ2F0aXZlIHdlaWdodHNcXCkgK1xuICAgICAgICAgICAgICBwbG90X2Fubm90YXRpb24odGl0bGU9cGFzdGUwKFxcRmFjdG9yXFwsIGYpKSlcbiAgICAgIH1cbiAgfVxuYGBgXG5gYGAifQ== -->
```r
```r
# GSEA on positive weights, with default options
res.positive <- run_enrichment(mofa_trained,
view='scaled_logcounts',
# statistical.test = 'cor.adj.parametric',
feature.sets = reactomeGS_universe,
sign = \positive\,
)
# GSEA on negative weights, with default options
res.negative <- run_enrichment(mofa_trained,
view='scaled_logcounts',
# statistical.test = 'cor.adj.parametric',
feature.sets = reactomeGS_universe,
sign = \negative\
)
for (f in 1:mofa_trained@dimensions$K){
if (min(res.positive$pval.adj[,paste0(\Factor\, f)]) < 0.1) {
print(plot_enrichment(res.positive, factor = f, alpha=0.1) + ggtitle(\Positive weights\) +
plot_enrichment(res.negative, factor = f, alpha=0.1) + ggtitle(\Negative weights\) +
plot_annotation(title=paste0(\Factor\, f)))
}
}
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuc2lnbmlmX3BhdGh3YXlzIDwtIHJvd25hbWVzKGRhdGEuZnJhbWUocmVzLm5lZ2F0aXZlJHB2YWwuYWRqKSlbb3JkZXIoZGF0YS5mcmFtZShyZXMubmVnYXRpdmUkcHZhbC5hZGopW1tcXEZhY3RvcjhcXF1dKVswOjEwXV1cbmNvbG5hbWVzKHJlYWN0b21lR1NfdW5pdmVyc2UpW3JlYWN0b21lR1NfdW5pdmVyc2Vbc2lnbmlmX3BhdGh3YXlzWzVdLF09PTFdXG5wbG90X2VucmljaG1lbnRfZGV0YWlsZWQocmVzLm5lZ2F0aXZlLCBmYWN0b3IgPSA4KVxuYGBgXG5gYGAifQ== -->
```r
```r
signif_pathways <- rownames(data.frame(res.negative$pval.adj))[order(data.frame(res.negative$pval.adj)[[\Factor8\]])[0:10]]
colnames(reactomeGS_universe)[reactomeGS_universe[signif_pathways[5],]==1]
plot_enrichment_detailed(res.negative, factor = 8)
```
–> –> –> –> –> –> –> –> –> –> –> –> –> –> –> –> –>
–> –> –> –> –>
–> –> –>